The Dataset

The dataset considered is the Steam Video Games Dataset. This dataset is a list of user behaviors, with columns: user-id, game-title, behavior-name, value. The behaviors included are ‘purchase’ and ‘play’. The value indicates the degree to which the behavior was performed - in the case of ‘purchase’ the value is always 1, and in the case of ‘play’ the value represents the number of hours the user has played the game.

raw_data = as_tibble(
  read.csv("steam-200k.csv", header=F,
           col.names = c( "user-id","game-title", "behavior-name", "value", "unknown")
           )
  ) %>% select(-unknown)
head(raw_data)

The most played games

play_time =
  raw_data %>%
  filter(behavior.name == "play") %>%
  select(-behavior.name) %>%
  group_by(user.id) %>%
  mutate(total_time = sum(value)) %>%
  ungroup() %>%
  mutate(perc_time = value/total_time) %>%
  select(user.id, game.title, time=value, perc_time, total_time)

games_play_time = play_time %>%
  select(-user.id, -time, -perc_time) %>%
  group_by(game.title) %>%
  summarize(total_time = sum(total_time)) %>%
  ungroup() %>%
  arrange(desc(total_time)) %>%
  mutate(rnum = row_number())
considered_n = 21
to_plot = games_play_time %>% filter(rnum <= considered_n)
to_plot[considered_n,]$game.title = "Others"
to_plot[considered_n,]$total_time =
  games_play_time %>% filter(rnum >= considered_n) %>% summarize(total_time=sum(total_time)) %>% pull(total_time)

# TODO fare passaggio con bottone da un grafico all'altro
# TODO aggiungere label in cima a colonne
# TODO si puo' creare una colonna "spezzata" ad indicare che sarebbe molto piu' alta ed in cima ci metti il valore "fuori scala"?
to_plot %>%
  ggplot(aes(x=rnum, y=total_time)) +
  geom_col()

to_plot %>%
  filter(rnum != considered_n) %>%
  ggplot(aes(x=rnum, y=total_time)) +
  geom_col()

if (nrow(user_play_time) > 10) {
  perc_to_show=0.90
  to_plot = user_play_time %>%
    select(game.title, perc_time, time) %>%
    mutate(cum_perc_time=cumsum(perc_time)) %>%
    arrange(cum_perc_time) %>%
    filter(cum_perc_time <= perc_to_show)
  
  to_plot = to_plot %>%
    add_row(game.title="Others",
            perc_time=1.0 - max(to_plot$cum_perc_time),
            time = (summarize(user_play_time, sum(time)) - summarize(to_plot, sum(time))) %>% pull(),
            cum_perc_time=1.0)
} else {
  to_plot = user_play_time %>%
    select(game.title, perc_time, time) %>%
    mutate(cum_perc_time=cumsum(perc_time)) %>%
    arrange(cum_perc_time)
}

# Se il giocatore ha giocato a tantissimi giochi distribuendo il tempo
if (nrow(to_plot) > 10) {
  too_big_to_plot = to_plot %>%
    arrange(desc(perc_time)) %>%
    select(-cum_perc_time) %>%
    mutate(nrow = row_number())
  #too_big_to_plot
  
  to_plot = too_big_to_plot %>%
    filter(nrow < 8) %>%
    select(game.title, perc_time, time) %>%
    mutate(cum_perc_time=cumsum(perc_time)) %>%
    arrange(cum_perc_time)
  
  to_plot = to_plot %>%
    add_row(game.title="Others",
            perc_time=1.0 - max(to_plot$cum_perc_time),
            time = (summarize(user_play_time, sum(time)) - summarize(to_plot, sum(time))) %>% pull(),
            cum_perc_time=1.0)
  #to_plot
}
to_plot = to_plot %>%
#to_plot %>%
  mutate(label=scales::percent(perc_time)) %>%
  mutate(label=paste(label, paste(time,"h",sep=""), sep="\n")) %>%
  select(-cum_perc_time) %>%
  arrange(desc(game.title)) %>%
  mutate(lab.ypos = cumsum(perc_time) - perc_time/2)

to_plot
# TODO fare trucchetto per ordinare legenda

to_plot %>%
  ggplot(aes(x = "", y = perc_time, fill = game.title)) +
  geom_bar(width = 1, stat = "identity", color = "white") +
  coord_polar("y") +
  
  #geom_text(aes(y = lab.ypos, label = label), color = "white") +
  #geom_text(aes(y = lab.ypos, label = label), color = "black") +
  #geom_text(aes(x=rep(1.3, length(lab.ypos)), y = lab.ypos, label = label), color = "black") +
  #geom_text(aes(x=1-perc_time*.02, y = lab.ypos, label = label), color = "black") +
  #geom_text(aes(x=max(1, 1-perc_time*2), y = lab.ypos, label = label), color = "black") +
  
  #geom_text(aes(x=0.3 + (1-perc_time*.5), y = lab.ypos, label = label), color = "black") +
  geom_text(aes(x=0.3 + (1-perc_time*.6), y = lab.ypos, label = label), color = "black") +
  
  #scale_fill_manual(values= brewer.pal(n = 5, name = "RdBu")) +
  #scale_fill_manual(values= brewer.pal(n = 5, name = "Dark2")) +
  scale_fill_manual(values= brewer.pal(n=nrow(to_plot), name = "Pastel1")) +
  #scale_fill_manual(values= brewer.pal(n = 5, name = "Pastel2")) +
  labs(
    title = paste("Play Time of User", user_play_time %>% pull(user.id)),
    fill="Game Titles"
    ) + 
  theme_void()

The most bought and NOT played

raw_data %>% head()